# -*- coding: utf-8 -*-
import scrapy
from scrapy.spiders import CrawlSpider,Rule
from scrapy.linkextractors import LinkExtractor

from dirbot.items import ImageItem


class TtSpider(CrawlSpider):
    name = 'tt'
    allowed_domains = ['xiaohuar.com']
    start_urls = ['http://www.xiaohuar.com/hua/']
    rules = (
        # 提取匹配 'category.php' (但不匹配 'subsection.php') 的链接并跟进链接(没有callback意味着follow默认为True)
        Rule(LinkExtractor(allow=('p-1-\d+\.html',))),
        Rule(LinkExtractor(allow=('s-1-\d+\.html',)),follow=True,callback='parse_item'),

        # 提取匹配 'item.php' 的链接并使用spider的parse_item方法进行分析
        # Rule(LinkExtractor(allow=('p-1-\d+\.html',)), callback='parse_item'),
    )


    def parse_item(self,response):
        item = ImageItem()
        item['image_urls']=["http://www.xiaohuar.com"+response.css('.ad-image').xpath('img/@src').extract_first()]
        print item['image_urls']
        return item